**********
//Analysis of Better Together data received 5 Sep 2022
//Date analysis performed: 1/31/23; 1/16/23; 9/27/22; 9/26/22; 9/23/22
//Author: Marta Wilson-Barthes
**********

clear all
capture log close
macro drop _all
set more off, permanently
set linesize 255

//set global directories 
pwd
global folder "P:\Omar Projects\Better Together R01 Proposal\Data files"
global sourcedir "$folder\Source"
global outputdir "$folder\Output"
global codedir "$folder\Code"

log using "$outputdir/BetterTogetherDescrpAnalysis.log", replace

//change directory to source folder
cd "$sourcedir"	
pwd


******************Merging the response and sociodemigraphic data for cases and controls to generate one dataset**************

*Read in the Excel Cases - Measures data and save as a .dta file 
	import excel "Better together quantitative data_CASES_updated_26.01.2023.xlsx", sheet("Sheet1") firstrow clear
	save "$sourcedir\BTCase_measures.dta", replace

*how many obs in the dataset
	codebook PIDCASES //20 records
	
*missing report for all vars
	mdesc
	drop BL-BW //drop empty values that occured due to formatting of csv file
	mdesc
	save "$sourcedir\BTCase_measures.dta", replace

*For CASES, merge the measure data with the sociodemigraphic data by PID
	rename PIDCASES PID
	sort PID
	save "$sourcedir\BTCase_measures.dta", replace
	use "$sourcedir\BTCase_demo.dta", clear
	sort PID
	merge 1:1 PID using "$sourcedir\BTCase_measures.dta" //all 20 obs matched
	drop _merge

*assign a treatment dummy indicator to the data before merging with controls
	gen tx = 1
	save "$sourcedir\BTCase_all.dta", replace

*data cleaning 
	drop Clinicname Interviewer Starttime Telcellno Endtime //drop any potentially identifying data 
	replace Biologicalfather = "" if Biologicalfather=="n/a"
	destring Biologicalfather, replace ignore(" ")
	rename O biofather_other
	
	*rename select vars to so that var names for CASES data match var names for CONTROL data
	rename TOT_SCORE TOTSCORE
	rename CATIS_FINTOT13 FINTOT13
	rename BSCIY_TSCORES BSCIY_SCALEDSCORES
	rename BAI_Y_TSCORES BAI_Y_SCALEDSCORES
	rename BDIYSCALEDSCORES BDIYSCALED
	rename BANIYTSCORES BANIYSCALED
	rename BDBIYTSCORES BDBIYSCALED 
	save "$sourcedir\BTCase_all.dta", replace

*Read in the Excel Controls - Measures data and save as a .dta file 
	import excel "$sourcedir\Better Together_Quantitative data_CONTROLS_updated_26.01.2023.xlsx", sheet("Sheet 1") firstrow clear
	rename PIDCONTROLS PID

*how many obs in the CONTROLS dataset
	codebook PID //38 records
	
*missing report for all vars
	mdesc
	drop BL BM
	drop if PID==""
	mdesc
	save "$sourcedir\BTControl_measures.dta", replace

*For CONTROLS, merge the measure data with the sociodemigraphic data by PID
	sort PID
	save "$sourcedir\BTControl_measures.dta", replace
	use "$sourcedir\BTControl_demo.dta", clear
	sort PID
	merge 1:1 PID using "$sourcedir\BTControl_measures.dta" //all 38 obs matched
	drop _merge
	
*assign a treatment dummy indicator to the data before merging with cases
	gen tx = 0
	save "$sourcedir\BTControl_all.dta", replace

*Some PIDs are the same between cases and controls - attach a "C" to PIDs for controls befor merging 
	gen PID2 = "C-"+ PID
	drop PID
	rename PID2 PID

*data cleaning 
	drop Interviewer Starttime Telcellno Endtime Clinicname
	replace Biologicalfather = "" if Biologicalfather=="n/a"
	destring Biologicalfather, replace ignore(" ")
	rename O biofather_other
	
	save "$sourcedir\BTControl_all.dta", replace	
	
*Append the combined CONTROLS data to the combined CASES data 
	append using "$sourcedir\BTCase_all.dta"
	save "$sourcedir\BT_all.dta", replace	

	
	

******************Generate Descriptive Stats Table, Overall and By Treatment Arm**************
use "$sourcedir\BT_all.dta", clear

*gen numeric household size var
	gen hh_size = substr(Householdsize,1,2)
	destring hh_size, generate(hh_size_n)
	drop hh_size
	
*label vars for readability
rename TOTSCORE CDRISC_TOTAL
label var CDRISC_TOTAL "CDRISC - Total Score"
rename FINTOT13 CATIS_MEAN
label var CATIS_MEAN "CATIS - Mean Score"
rename TOT BERGER_TOTAL
label var BERGER_TOTAL "Berger Scale - Total Score"

label var tx "Participating in Better Together Group Intervention"
capture label define tx_lb 0 "No Better Together group participation" 1 "Has attended at >= 5 Better Together group sessions"
label values tx tx_lb

label var BSCIY_RAW	"Beck Self-Concept Inventory for Youth - Raw Score"
label var BSCIY_SCALEDSCORES "Beck Self-Concept Inventory for Youth - T-Score"

label var BDBIYRAW "Beck Disruptive Inventory for Youth - Raw Score"
label var BDBIYSCALED "Beck Disruptive Inventory for Youth - T-Score"

label var BANIYRAW "Beck Anger Inventory for Youth - Raw Score"
label var BANIYSCALED "Beck Anger Inventory for Youth - T-Score"

label var BDIYRAW "Beck Depression Inventory for Youth - Raw Score"
label var BDIYSCALED "Beck Depression Inventory for Youth - T-Score"

label var BAI_Y_RAW "Beck Anxiety Inventory for Youth - Raw Score"
label var BAI_Y_SCALEDSCORES "Beck Anxiety Inventory for Youth - T-Score"

capture label define gender_lb 0 "Female" 1 "Male" 2 "Other (e.g., transsexual or intersex)"
label values Gender gender_lb

capture label define race_lb 0 "Black African" 1 "Coloured" 2 "Indian or Asian" 3 "White" 4 "Other"
label values Race race_lb

capture label define lang_lb 0 "IsiXhosa" 1 "Afrikaans" 2 "English" 3 "Other" 
label values Language lang_lb
replace Language = 3 if Language==.

capture drop education_n num_educ educ_bin
gen education_n =  substr(Education,1,2)
	sort education_n
	replace education_n = "9" if Education=="Level3" //assumes Level 3 = 9th/10th grade
	replace education_n = "15" if education_n=="Fi" //assumes First Year of college

*create continuous and binary education var
destring education_n, generate(num_educ)
gen educ_bin = 0 if num_educ < 8 //primary
replace educ_bin = 1 if num_educ >=8 // secondary

label var hh_size_n "Household Size (Adults and Children)"

capture label define house_lb 1 "Formal" 2 "Informal"
label values Typeofhousing house_lb

label var Biologicalmother "Is Biological Mother Alive?"
capture label define mom_lb 0 "No" 1 "Yes" . "Unknown"
label values Biologicalmother mom_lb

label var Biologicalfather "Is Biological Father Alive?"
capture label define dad_lb 0 "No" 1 "Yes" . "Unknown"
label values Biologicalfather dad_lb

label var Governmentsocialgrant "Are you accessing any government social grant?"
capture label define grant_lb 1 "Yes" 0 "No"
label values Governmentsocialgrant grant_lb

*for HIV Stigma scale, some responses are 999 --> replace with missing
	foreach v of varlist HIVSTIGMA_1-HIVSTIGMA_TOT {
		replace `v' = . if `v' == 999
		}

		table1_mc, 	by(tx) 							/// 
			vars(		Age contn %4.2f \  			///
						Gender cate \				///
						Race cate \  				///
						Language cate \ 			///
						educ_bin cate \				///
						hh_size_n contn %4.2f \		///
						Typeofhousing cate \		///
						Biologicalmother cate \		///
						Biologicalfather cate \		///
						Governmentsocialgrant cate \ ///
						CDRISC_TOTAL contn %4.2f \	///
						CATIS_TOT contn %4.2f \		///
						CATIS_MEAN contn %4.2f \	///
						BERGER_TOTAL contn %4.2f \	///
						HIVSTIGMA_TOT contn %4.2f \	///
						BSCIY_RAW contn %4.2f \		///
						BSCIY_SCALEDSCORES contn %4.2f \	///
						BAI_Y_RAW contn %4.2f \		///
						BAI_Y_SCALEDSCORES contn %4.2f \	///
						BDIYRAW contn %4.2f \		///
						BDIYSCALED contn %4.2f \	///
						BANIYRAW contn %4.2f \		///
						BANIYSCALED contn %4.2f \	///
						BDBIYRAW contn %4.2f \		///
						BDBIYSCALED contn %4.2f)  nospace percent_n onecol missing test total(before) saving("P:\Omar Projects\Better Together R01 Proposal\Data files\Output\table 1.xlsx", replace)			

label var tx "Participation in Better Together Peer Group Intervention"
capture label define tx_ll 0 "0 peer groups attended" 1 ">= 5 peer groups attended"
label values tx tx_ll						

set scheme s1mono
graph bar 	BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED, over(tx) yline(55) b1title("Number of Better Together Peer Groups Attended at Study Enrollment") ytitle("Mean T-Score") ///
			legend(label(1 "mean_Self-Concept T-Score") label(2 "mean_Anxiety T-Score") label(3 "mean_Depression T-Score") label(4 "mean_Anger T-Score") label(5 "mean_Disruptive T-Score") size(small)) asyvars

graph bar 	CDRISC_TOTAL CATIS_TOT BERGER_TOTAL HIVSTIGMA_TOT, over(tx) b1title("Number of Better Together Peer Groups Attended at Study Enrollment") ytitle("Mean Total Score") ///
			legend(label(1 "Resilience (CD-RISC 10)") label(2 "Attitude towards illness (CATIS)") label(3 "Chronic disease stigma (Revised Berger Scale)") label(4 "HIV stigma (ALHIV-SS)") size(*.6)) asyvars
			
									
******************Multivariate regression analysis comparing psychosocial measure outcomes between treatment and control groups**************
tab tx HIVSTIGMA_TOT
//correlation between outcomes
pwcorr CDRISC_TOTAL CATIS_TOT CATIS_MEAN BERGER_TOTAL HIVSTIGMA_TOT BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED, sig star(.05) obs bon
pwcorr BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED, sig star(.05) obs bon

			
*****MANOVA is used to model two or more (potentially interrelated) dependent variables that are continuous with one or more categorical predictor variables.*****

*manova requires independent vars to be categorical
gen age_bin = 1 if Age >=18 & Age !=.
replace age_bin = 0 if Age <18 & Age !=.

*can we assume dependent variables are normally distributed?
mvtest normality CDRISC_TOTAL CATIS_TOT BERGER_TOTAL HIVSTIGMA_TOT BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED tx, univariate stats(all) 

manova CDRISC_TOTAL CATIS_TOT BERGER_TOTAL HIVSTIGMA_TOT BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED = i.tx  
mvreg

//One-way manova model presented in paper:
manova CDRISC_TOTAL CATIS_TOT BERGER_TOTAL BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED = i.tx  
mvreg

anova HIVSTIGMA_TOT tx

//multi-way manova model 
manova CDRISC_TOTAL CATIS_TOT BERGER_TOTAL BSCIY_SCALEDSCORES BAI_Y_SCALEDSCORES BDIYSCALED BANIYSCALED BDBIYSCALED = i.tx c.age_bin c.Gender c.educ_bin
mvreg 


********Logistic regression analyses for outcome of screened positive for Depression based on BDI score***************
///According to Beck et al. (1988), the Center for Cognitive Therapy has set the following guidelines for BDI cut-off scores to be used with affective disorder patients: ///
///scores from 0 through 9 indicate no or minimal depression; scores from 10 through 18 indicate mild to moderate depression; ///
///scores from 19 through 29 indicate moderate to severe depression; and scores from 30 through 63 indicate severe depression. ///
///https://www.sciencedirect.com/topics/medicine-and-dentistry/beck-depression-inventory
///https://www.slu.edu/medicine/family-medicine/pdfs/beck-depression-inventory-ii.pdf
///https://journals.sagepub.com/doi/pdf/10.1177/2047487314527851 <-- cut off score = 10
///https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4119288/ <-- "Furthermore, our study suggests a score of ≥13 in BDI would be appropriate as a screening cut-off score, whereas a score of ≥17 would be more appropriate for diagnostic use.


sum BDIYSCALED, detail
sum BDIYRAW, detail
capture drop depression_bin_screen
gen depression_bin_screen = . 
replace depression_bin_screen = 0 if BDIYRAW < 19 //screened negative for depression 
replace depression_bin_screen = 1 if BDIYRAW >= 19 //screened positive for depression 
capture label define dep_lb 0 "Screened negative for depression" 1 "Screened positive for depression"
label values depression_bin_screen dep_lb

gen hh_type = 1 if Typeofhousing==2 //informal
replace hh_type = 0 if Typeofhousing==1 //formal

gen sex = 1 if Gender == 1
replace sex = 0 if Gender == 0
replace sex = . if Gender == 2

logistic depression_bin_screen i.tx
estimates store m1, title (model 1 dep)

logistic depression_bin_screen i.tx Age i.sex i.Typeofhousing hh_size_n i.Biologicalmother i.Governmentsocialgrant
estimates store m2, title (model 2 adep)


********Logistic regression analyses for outcome of screened positive for Anxiety based on BAI score***************
///https://www.sciencedirect.com/science/article/pii/S0887618505000666: The optimal cut-off for the BAI was determined to be a score of 20, which had a sensitivity of .67 and a specificity of .93.
///https://www.jolietcenter.com/storage/app/media/beck-anxiety-inventory.pdf

gen anxiety_bin_screen = . 
replace anxiety_bin_screen = 0 if BAI_Y_RAW < 20 //screened negative for anxiety 
replace anxiety_bin_screen = 1 if BAI_Y_RAW >= 20 //screened positive for anxiety 
capture label define anx_lb 0 "Screened negative for anxiety" 1 "Screened positive for anxiety"
label values anxiety_bin_screen anx_lb

logistic anxiety_bin_screen i.tx
estimates store m3, title (model 3 anx)

logistic anxiety_bin_screen i.tx Age i.sex i.Typeofhousing hh_size_n i.Biologicalmother i.Governmentsocialgrant
estimates store m4, title (model 4 aanx)

**combine regression outputs for depression and anxiety 
esttab m1 m2 m3 m4 using "P:\Omar Projects\Better Together R01 Proposal\Data files\Output\table 2.csv",replace stats(n chi2 bic, star(chi2)) eform se starl( * 0.10 ** 0.05 *** 0.010) 

**margins command
logistic depression_bin_screen i.tx Age i.sex i.Typeofhousing hh_size_n i.Biologicalmother i.Governmentsocialgrant
margins, dydx(tx) atmeans
logistic anxiety_bin_screen i.tx Age i.sex i.Typeofhousing hh_size_n i.Biologicalmother i.Governmentsocialgrant
margins, dydx(tx) atmeans

//make coef plot of odds ratios
logistic depression_bin_screen i.tx Age i.Gender i.educ i.Biologicalmother i.Governmentsocialgrant
estimates store Depression1
logistic anxiety_bin_screen i.tx Age i.Gender i.educ i.Biologicalmother i.Governmentsocialgrant
estimates store Anxiety1
coefplot Depression1 Anxiety1, xline(1) eform coeflabels(, wrap(20)) drop(_cons)

log close
clear
